{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Table of Contents\n",
    " <p><div class=\"lev1\"><a href=\"#Example-of-GibbsLDA-and-vbLDA\"><span class=\"toc-item-num\">1 - </span>Example of GibbsLDA and vbLDA</a></div><div class=\"lev2\"><a href=\"#Loading-Reuter-corpus-from-NLTK\"><span class=\"toc-item-num\">1.1 - </span>Loading Reuter corpus from NLTK</a></div><div class=\"lev2\"><a href=\"#Inferencen-through-the-Gibbs-sampling\"><span class=\"toc-item-num\">1.2 - </span>Inferencen through the Gibbs sampling</a></div><div class=\"lev3\"><a href=\"#Print-top-10-probability-words-for-each-topic\"><span class=\"toc-item-num\">1.2.1 - </span>Print top 10 probability words for each topic</a></div><div class=\"lev2\"><a href=\"#Inferencen-through-the-Variational-Bayes\"><span class=\"toc-item-num\">1.3 - </span>Inferencen through the Variational Bayes</a></div><div class=\"lev3\"><a href=\"#Print-top-10-probability-words-for-each-topic\"><span class=\"toc-item-num\">1.3.1 - </span>Print top 10 probability words for each topic</a></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example of GibbsLDA and vbLDA"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This example requires to install three nltk corpora:nltk.corpus.reuters, nltk.corpus.words, nltk.corpus.stopwords.\n",
    "\n",
    "You can download the corpora via `nltk.download()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "import numpy as np\n",
    "from ptm import GibbsLDA\n",
    "from ptm import vbLDA\n",
    "from ptm.nltk_corpus import get_reuters_ids_cnt\n",
    "from ptm.utils import convert_cnt_to_list, get_top_words"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading Reuter corpus from NLTK"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load reuter corpus including 1000 documents with maximum vocabulary size of 10000 from NLTK corpus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Vocabulary size:4632\n"
     ]
    }
   ],
   "source": [
    "n_doc = 1000\n",
    "voca, doc_ids, doc_cnt = get_reuters_ids_cnt(num_doc=n_doc, max_voca=10000)\n",
    "docs = convert_cnt_to_list(doc_ids, doc_cnt)\n",
    "n_voca = len(voca)\n",
    "print('Vocabulary size:%d' % n_voca)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inferencen through the Gibbs sampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2016-02-10 19:42:01 INFO:GibbsLDA:[ITER] 0,\telapsed time:0.86,\tlog_likelihood:-447909.18\n",
      "2016-02-10 19:42:02 INFO:GibbsLDA:[ITER] 1,\telapsed time:0.89,\tlog_likelihood:-421738.22\n",
      "2016-02-10 19:42:03 INFO:GibbsLDA:[ITER] 2,\telapsed time:0.94,\tlog_likelihood:-405181.71\n",
      "2016-02-10 19:42:04 INFO:GibbsLDA:[ITER] 3,\telapsed time:0.87,\tlog_likelihood:-393867.42\n",
      "2016-02-10 19:42:05 INFO:GibbsLDA:[ITER] 4,\telapsed time:0.90,\tlog_likelihood:-385570.47\n",
      "2016-02-10 19:42:06 INFO:GibbsLDA:[ITER] 5,\telapsed time:0.90,\tlog_likelihood:-379114.11\n",
      "2016-02-10 19:42:07 INFO:GibbsLDA:[ITER] 6,\telapsed time:0.92,\tlog_likelihood:-374416.99\n",
      "2016-02-10 19:42:08 INFO:GibbsLDA:[ITER] 7,\telapsed time:0.90,\tlog_likelihood:-371338.53\n",
      "2016-02-10 19:42:09 INFO:GibbsLDA:[ITER] 8,\telapsed time:0.88,\tlog_likelihood:-368035.03\n",
      "2016-02-10 19:42:10 INFO:GibbsLDA:[ITER] 9,\telapsed time:0.93,\tlog_likelihood:-365556.67\n",
      "2016-02-10 19:42:11 INFO:GibbsLDA:[ITER] 10,\telapsed time:0.87,\tlog_likelihood:-363627.94\n",
      "2016-02-10 19:42:11 INFO:GibbsLDA:[ITER] 11,\telapsed time:0.84,\tlog_likelihood:-362118.57\n",
      "2016-02-10 19:42:12 INFO:GibbsLDA:[ITER] 12,\telapsed time:0.83,\tlog_likelihood:-360546.38\n",
      "2016-02-10 19:42:13 INFO:GibbsLDA:[ITER] 13,\telapsed time:0.85,\tlog_likelihood:-359183.30\n",
      "2016-02-10 19:42:14 INFO:GibbsLDA:[ITER] 14,\telapsed time:0.96,\tlog_likelihood:-358050.27\n",
      "2016-02-10 19:42:15 INFO:GibbsLDA:[ITER] 15,\telapsed time:0.92,\tlog_likelihood:-357094.26\n",
      "2016-02-10 19:42:16 INFO:GibbsLDA:[ITER] 16,\telapsed time:0.87,\tlog_likelihood:-356045.67\n",
      "2016-02-10 19:42:17 INFO:GibbsLDA:[ITER] 17,\telapsed time:0.84,\tlog_likelihood:-355085.27\n",
      "2016-02-10 19:42:18 INFO:GibbsLDA:[ITER] 18,\telapsed time:0.85,\tlog_likelihood:-354129.45\n",
      "2016-02-10 19:42:18 INFO:GibbsLDA:[ITER] 19,\telapsed time:0.84,\tlog_likelihood:-353360.71\n",
      "2016-02-10 19:42:19 INFO:GibbsLDA:[ITER] 20,\telapsed time:0.90,\tlog_likelihood:-352636.22\n",
      "2016-02-10 19:42:20 INFO:GibbsLDA:[ITER] 21,\telapsed time:0.87,\tlog_likelihood:-352033.27\n",
      "2016-02-10 19:42:21 INFO:GibbsLDA:[ITER] 22,\telapsed time:0.84,\tlog_likelihood:-351298.31\n",
      "2016-02-10 19:42:22 INFO:GibbsLDA:[ITER] 23,\telapsed time:0.84,\tlog_likelihood:-351056.08\n",
      "2016-02-10 19:42:23 INFO:GibbsLDA:[ITER] 24,\telapsed time:0.83,\tlog_likelihood:-350554.31\n",
      "2016-02-10 19:42:24 INFO:GibbsLDA:[ITER] 25,\telapsed time:0.85,\tlog_likelihood:-350214.01\n",
      "2016-02-10 19:42:25 INFO:GibbsLDA:[ITER] 26,\telapsed time:0.84,\tlog_likelihood:-350201.01\n",
      "2016-02-10 19:42:25 INFO:GibbsLDA:[ITER] 27,\telapsed time:0.85,\tlog_likelihood:-349730.70\n",
      "2016-02-10 19:42:26 INFO:GibbsLDA:[ITER] 28,\telapsed time:0.91,\tlog_likelihood:-349007.47\n",
      "2016-02-10 19:42:27 INFO:GibbsLDA:[ITER] 29,\telapsed time:0.85,\tlog_likelihood:-349175.12\n",
      "2016-02-10 19:42:28 INFO:GibbsLDA:[ITER] 30,\telapsed time:0.88,\tlog_likelihood:-348863.94\n",
      "2016-02-10 19:42:29 INFO:GibbsLDA:[ITER] 31,\telapsed time:0.85,\tlog_likelihood:-348612.34\n",
      "2016-02-10 19:42:30 INFO:GibbsLDA:[ITER] 32,\telapsed time:0.90,\tlog_likelihood:-347934.48\n",
      "2016-02-10 19:42:31 INFO:GibbsLDA:[ITER] 33,\telapsed time:0.97,\tlog_likelihood:-347867.02\n",
      "2016-02-10 19:42:32 INFO:GibbsLDA:[ITER] 34,\telapsed time:0.95,\tlog_likelihood:-347414.72\n",
      "2016-02-10 19:42:33 INFO:GibbsLDA:[ITER] 35,\telapsed time:0.86,\tlog_likelihood:-347418.91\n",
      "2016-02-10 19:42:34 INFO:GibbsLDA:[ITER] 36,\telapsed time:0.96,\tlog_likelihood:-347124.65\n",
      "2016-02-10 19:42:35 INFO:GibbsLDA:[ITER] 37,\telapsed time:0.84,\tlog_likelihood:-346625.26\n",
      "2016-02-10 19:42:35 INFO:GibbsLDA:[ITER] 38,\telapsed time:0.83,\tlog_likelihood:-346294.68\n",
      "2016-02-10 19:42:36 INFO:GibbsLDA:[ITER] 39,\telapsed time:0.86,\tlog_likelihood:-346413.61\n",
      "2016-02-10 19:42:37 INFO:GibbsLDA:[ITER] 40,\telapsed time:0.98,\tlog_likelihood:-346242.04\n",
      "2016-02-10 19:42:38 INFO:GibbsLDA:[ITER] 41,\telapsed time:0.89,\tlog_likelihood:-346290.64\n",
      "2016-02-10 19:42:39 INFO:GibbsLDA:[ITER] 42,\telapsed time:0.86,\tlog_likelihood:-346108.81\n",
      "2016-02-10 19:42:40 INFO:GibbsLDA:[ITER] 43,\telapsed time:0.96,\tlog_likelihood:-345780.29\n",
      "2016-02-10 19:42:41 INFO:GibbsLDA:[ITER] 44,\telapsed time:0.91,\tlog_likelihood:-345771.55\n",
      "2016-02-10 19:42:42 INFO:GibbsLDA:[ITER] 45,\telapsed time:0.85,\tlog_likelihood:-345758.19\n",
      "2016-02-10 19:42:43 INFO:GibbsLDA:[ITER] 46,\telapsed time:0.95,\tlog_likelihood:-345798.00\n",
      "2016-02-10 19:42:44 INFO:GibbsLDA:[ITER] 47,\telapsed time:0.95,\tlog_likelihood:-345794.09\n",
      "2016-02-10 19:42:45 INFO:GibbsLDA:[ITER] 48,\telapsed time:0.95,\tlog_likelihood:-345631.54\n",
      "2016-02-10 19:42:46 INFO:GibbsLDA:[ITER] 49,\telapsed time:0.89,\tlog_likelihood:-345489.19\n",
      "2016-02-10 19:42:47 INFO:GibbsLDA:[ITER] 50,\telapsed time:1.01,\tlog_likelihood:-345386.81\n",
      "2016-02-10 19:42:48 INFO:GibbsLDA:[ITER] 51,\telapsed time:0.93,\tlog_likelihood:-345105.51\n",
      "2016-02-10 19:42:49 INFO:GibbsLDA:[ITER] 52,\telapsed time:0.95,\tlog_likelihood:-345095.54\n",
      "2016-02-10 19:42:49 INFO:GibbsLDA:[ITER] 53,\telapsed time:0.89,\tlog_likelihood:-344779.87\n",
      "2016-02-10 19:42:50 INFO:GibbsLDA:[ITER] 54,\telapsed time:0.91,\tlog_likelihood:-344897.46\n",
      "2016-02-10 19:42:51 INFO:GibbsLDA:[ITER] 55,\telapsed time:0.91,\tlog_likelihood:-344546.15\n",
      "2016-02-10 19:42:52 INFO:GibbsLDA:[ITER] 56,\telapsed time:0.88,\tlog_likelihood:-344541.70\n",
      "2016-02-10 19:42:53 INFO:GibbsLDA:[ITER] 57,\telapsed time:0.89,\tlog_likelihood:-344516.72\n",
      "2016-02-10 19:42:54 INFO:GibbsLDA:[ITER] 58,\telapsed time:0.94,\tlog_likelihood:-344702.70\n",
      "2016-02-10 19:42:55 INFO:GibbsLDA:[ITER] 59,\telapsed time:0.94,\tlog_likelihood:-344196.74\n",
      "2016-02-10 19:42:56 INFO:GibbsLDA:[ITER] 60,\telapsed time:0.90,\tlog_likelihood:-344231.88\n",
      "2016-02-10 19:42:57 INFO:GibbsLDA:[ITER] 61,\telapsed time:0.89,\tlog_likelihood:-344436.79\n",
      "2016-02-10 19:42:58 INFO:GibbsLDA:[ITER] 62,\telapsed time:0.88,\tlog_likelihood:-343805.88\n",
      "2016-02-10 19:42:59 INFO:GibbsLDA:[ITER] 63,\telapsed time:0.91,\tlog_likelihood:-344083.66\n",
      "2016-02-10 19:43:00 INFO:GibbsLDA:[ITER] 64,\telapsed time:0.91,\tlog_likelihood:-344131.20\n",
      "2016-02-10 19:43:00 INFO:GibbsLDA:[ITER] 65,\telapsed time:0.87,\tlog_likelihood:-344327.27\n",
      "2016-02-10 19:43:01 INFO:GibbsLDA:[ITER] 66,\telapsed time:0.85,\tlog_likelihood:-343887.94\n",
      "2016-02-10 19:43:02 INFO:GibbsLDA:[ITER] 67,\telapsed time:0.84,\tlog_likelihood:-343762.63\n",
      "2016-02-10 19:43:03 INFO:GibbsLDA:[ITER] 68,\telapsed time:0.93,\tlog_likelihood:-343623.01\n",
      "2016-02-10 19:43:04 INFO:GibbsLDA:[ITER] 69,\telapsed time:0.89,\tlog_likelihood:-343498.40\n",
      "2016-02-10 19:43:05 INFO:GibbsLDA:[ITER] 70,\telapsed time:0.87,\tlog_likelihood:-343147.74\n",
      "2016-02-10 19:43:06 INFO:GibbsLDA:[ITER] 71,\telapsed time:0.85,\tlog_likelihood:-343025.72\n",
      "2016-02-10 19:43:07 INFO:GibbsLDA:[ITER] 72,\telapsed time:0.87,\tlog_likelihood:-343189.09\n",
      "2016-02-10 19:43:08 INFO:GibbsLDA:[ITER] 73,\telapsed time:0.89,\tlog_likelihood:-343104.90\n",
      "2016-02-10 19:43:09 INFO:GibbsLDA:[ITER] 74,\telapsed time:0.88,\tlog_likelihood:-343020.70\n",
      "2016-02-10 19:43:09 INFO:GibbsLDA:[ITER] 75,\telapsed time:0.91,\tlog_likelihood:-342822.27\n",
      "2016-02-10 19:43:10 INFO:GibbsLDA:[ITER] 76,\telapsed time:0.86,\tlog_likelihood:-342671.10\n",
      "2016-02-10 19:43:11 INFO:GibbsLDA:[ITER] 77,\telapsed time:0.87,\tlog_likelihood:-342537.95\n",
      "2016-02-10 19:43:12 INFO:GibbsLDA:[ITER] 78,\telapsed time:0.88,\tlog_likelihood:-342711.56\n",
      "2016-02-10 19:43:13 INFO:GibbsLDA:[ITER] 79,\telapsed time:0.87,\tlog_likelihood:-342544.57\n",
      "2016-02-10 19:43:14 INFO:GibbsLDA:[ITER] 80,\telapsed time:0.88,\tlog_likelihood:-342719.10\n",
      "2016-02-10 19:43:15 INFO:GibbsLDA:[ITER] 81,\telapsed time:0.92,\tlog_likelihood:-342605.74\n",
      "2016-02-10 19:43:16 INFO:GibbsLDA:[ITER] 82,\telapsed time:0.87,\tlog_likelihood:-342609.81\n",
      "2016-02-10 19:43:17 INFO:GibbsLDA:[ITER] 83,\telapsed time:0.90,\tlog_likelihood:-342740.90\n",
      "2016-02-10 19:43:18 INFO:GibbsLDA:[ITER] 84,\telapsed time:0.89,\tlog_likelihood:-342668.54\n",
      "2016-02-10 19:43:18 INFO:GibbsLDA:[ITER] 85,\telapsed time:0.89,\tlog_likelihood:-342678.21\n",
      "2016-02-10 19:43:19 INFO:GibbsLDA:[ITER] 86,\telapsed time:0.87,\tlog_likelihood:-342797.02\n",
      "2016-02-10 19:43:20 INFO:GibbsLDA:[ITER] 87,\telapsed time:0.92,\tlog_likelihood:-342652.20\n",
      "2016-02-10 19:43:21 INFO:GibbsLDA:[ITER] 88,\telapsed time:0.89,\tlog_likelihood:-342328.18\n",
      "2016-02-10 19:43:22 INFO:GibbsLDA:[ITER] 89,\telapsed time:0.88,\tlog_likelihood:-342428.68\n",
      "2016-02-10 19:43:23 INFO:GibbsLDA:[ITER] 90,\telapsed time:0.90,\tlog_likelihood:-342853.29\n",
      "2016-02-10 19:43:24 INFO:GibbsLDA:[ITER] 91,\telapsed time:0.87,\tlog_likelihood:-342336.00\n",
      "2016-02-10 19:43:25 INFO:GibbsLDA:[ITER] 92,\telapsed time:0.89,\tlog_likelihood:-342357.74\n",
      "2016-02-10 19:43:26 INFO:GibbsLDA:[ITER] 93,\telapsed time:0.89,\tlog_likelihood:-341976.18\n",
      "2016-02-10 19:43:27 INFO:GibbsLDA:[ITER] 94,\telapsed time:0.93,\tlog_likelihood:-342270.78\n",
      "2016-02-10 19:43:28 INFO:GibbsLDA:[ITER] 95,\telapsed time:0.94,\tlog_likelihood:-342271.96\n",
      "2016-02-10 19:43:29 INFO:GibbsLDA:[ITER] 96,\telapsed time:0.94,\tlog_likelihood:-342092.68\n",
      "2016-02-10 19:43:30 INFO:GibbsLDA:[ITER] 97,\telapsed time:0.92,\tlog_likelihood:-341932.06\n",
      "2016-02-10 19:43:30 INFO:GibbsLDA:[ITER] 98,\telapsed time:0.90,\tlog_likelihood:-342061.92\n",
      "2016-02-10 19:43:31 INFO:GibbsLDA:[ITER] 99,\telapsed time:0.89,\tlog_likelihood:-341768.40\n"
     ]
    }
   ],
   "source": [
    "max_iter=100\n",
    "n_topic=10\n",
    "\n",
    "logger = logging.getLogger('GibbsLDA')\n",
    "logger.propagate = False\n",
    "\n",
    "model = GibbsLDA(n_doc, len(voca), n_topic)\n",
    "model.fit(docs, max_iter=max_iter)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Print top 10 probability words for each topic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Topic 0 :  market,bank,week,rate,rose,money,two,rise,three,fed\n",
      "Topic 1 :  quarter,first,april,record,earnings,dividend,share,prior,may,one\n",
      "Topic 2 :  oil,dome,one,debt,gas,price,plan,new,would,energy\n",
      "Topic 3 :  nil,stocks,production,total,end,use,start,soybean,supply,demand\n",
      "Topic 4 :  last,month,wheat,crop,grain,department,sugar,april,week,export\n",
      "Topic 5 :  loss,profit,corp,note,tax,chemical,gain,quarter,nine,operating\n",
      "Topic 6 :  trade,government,last,also,deficit,would,surplus,foreign,canada,industry\n",
      "Topic 7 :  japan,would,could,economic,japanese,market,west,growth,meeting,policy\n",
      "Topic 8 :  dollar,bank,yen,interest,exchange,term,days,currency,rate,current\n",
      "Topic 9 :  share,offer,stock,corp,acquisition,would,group,common,also,cash\n"
     ]
    }
   ],
   "source": [
    "for ti in range(n_topic):\n",
    "    top_words = get_top_words(model.TW, voca, ti, n_words=10)\n",
    "    print('Topic', ti ,': ', ','.join(top_words))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inferencen through the Variational Bayes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2016-02-10 19:43:32 INFO:vbLDA:[ITER] 0,\telapsed time:0.79,\tELBO:-478629.24\n",
      "2016-02-10 19:43:33 INFO:vbLDA:[ITER] 1,\telapsed time:0.78,\tELBO:-424352.68\n",
      "2016-02-10 19:43:34 INFO:vbLDA:[ITER] 2,\telapsed time:0.79,\tELBO:-380711.73\n",
      "2016-02-10 19:43:34 INFO:vbLDA:[ITER] 3,\telapsed time:0.76,\tELBO:-364218.72\n",
      "2016-02-10 19:43:35 INFO:vbLDA:[ITER] 4,\telapsed time:0.72,\tELBO:-357506.75\n",
      "2016-02-10 19:43:36 INFO:vbLDA:[ITER] 5,\telapsed time:0.69,\tELBO:-354117.34\n",
      "2016-02-10 19:43:37 INFO:vbLDA:[ITER] 6,\telapsed time:0.69,\tELBO:-352265.21\n",
      "2016-02-10 19:43:37 INFO:vbLDA:[ITER] 7,\telapsed time:0.69,\tELBO:-351168.75\n",
      "2016-02-10 19:43:38 INFO:vbLDA:[ITER] 8,\telapsed time:0.65,\tELBO:-350393.52\n",
      "2016-02-10 19:43:39 INFO:vbLDA:[ITER] 9,\telapsed time:0.65,\tELBO:-349864.68\n",
      "2016-02-10 19:43:39 INFO:vbLDA:[ITER] 10,\telapsed time:0.64,\tELBO:-349479.59\n",
      "2016-02-10 19:43:40 INFO:vbLDA:[ITER] 11,\telapsed time:0.66,\tELBO:-349231.45\n",
      "2016-02-10 19:43:40 INFO:vbLDA:[ITER] 12,\telapsed time:0.64,\tELBO:-349048.99\n",
      "2016-02-10 19:43:41 INFO:vbLDA:[ITER] 13,\telapsed time:0.64,\tELBO:-348919.67\n",
      "2016-02-10 19:43:42 INFO:vbLDA:[ITER] 14,\telapsed time:0.63,\tELBO:-348796.75\n",
      "2016-02-10 19:43:42 INFO:vbLDA:[ITER] 15,\telapsed time:0.65,\tELBO:-348698.18\n",
      "2016-02-10 19:43:43 INFO:vbLDA:[ITER] 16,\telapsed time:0.65,\tELBO:-348608.65\n",
      "2016-02-10 19:43:44 INFO:vbLDA:[ITER] 17,\telapsed time:0.64,\tELBO:-348538.82\n",
      "2016-02-10 19:43:44 INFO:vbLDA:[ITER] 18,\telapsed time:0.62,\tELBO:-348471.38\n",
      "2016-02-10 19:43:45 INFO:vbLDA:[ITER] 19,\telapsed time:0.63,\tELBO:-348418.05\n",
      "2016-02-10 19:43:46 INFO:vbLDA:[ITER] 20,\telapsed time:0.62,\tELBO:-348372.82\n",
      "2016-02-10 19:43:46 INFO:vbLDA:[ITER] 21,\telapsed time:0.63,\tELBO:-348327.48\n",
      "2016-02-10 19:43:47 INFO:vbLDA:[ITER] 22,\telapsed time:0.63,\tELBO:-348286.69\n",
      "2016-02-10 19:43:47 INFO:vbLDA:[ITER] 23,\telapsed time:0.63,\tELBO:-348257.43\n",
      "2016-02-10 19:43:48 INFO:vbLDA:[ITER] 24,\telapsed time:0.61,\tELBO:-348232.60\n",
      "2016-02-10 19:43:49 INFO:vbLDA:[ITER] 25,\telapsed time:0.63,\tELBO:-348203.76\n",
      "2016-02-10 19:43:49 INFO:vbLDA:[ITER] 26,\telapsed time:0.62,\tELBO:-348182.56\n",
      "2016-02-10 19:43:50 INFO:vbLDA:[ITER] 27,\telapsed time:0.62,\tELBO:-348160.58\n",
      "2016-02-10 19:43:51 INFO:vbLDA:[ITER] 28,\telapsed time:0.64,\tELBO:-348144.50\n",
      "2016-02-10 19:43:51 INFO:vbLDA:[ITER] 29,\telapsed time:0.65,\tELBO:-348122.57\n",
      "2016-02-10 19:43:52 INFO:vbLDA:[ITER] 30,\telapsed time:0.66,\tELBO:-348109.34\n",
      "2016-02-10 19:43:53 INFO:vbLDA:[ITER] 31,\telapsed time:0.63,\tELBO:-348098.76\n",
      "2016-02-10 19:43:53 INFO:vbLDA:[ITER] 32,\telapsed time:0.62,\tELBO:-348084.17\n",
      "2016-02-10 19:43:54 INFO:vbLDA:[ITER] 33,\telapsed time:0.61,\tELBO:-348071.97\n",
      "2016-02-10 19:43:54 INFO:vbLDA:[ITER] 34,\telapsed time:0.63,\tELBO:-348059.91\n",
      "2016-02-10 19:43:55 INFO:vbLDA:[ITER] 35,\telapsed time:0.62,\tELBO:-348051.82\n",
      "2016-02-10 19:43:56 INFO:vbLDA:[ITER] 36,\telapsed time:0.65,\tELBO:-348045.39\n",
      "2016-02-10 19:43:56 INFO:vbLDA:[ITER] 37,\telapsed time:0.60,\tELBO:-348034.56\n",
      "2016-02-10 19:43:57 INFO:vbLDA:[ITER] 38,\telapsed time:0.63,\tELBO:-348025.53\n",
      "2016-02-10 19:43:58 INFO:vbLDA:[ITER] 39,\telapsed time:0.61,\tELBO:-348018.32\n",
      "2016-02-10 19:43:58 INFO:vbLDA:[ITER] 40,\telapsed time:0.60,\tELBO:-348011.50\n",
      "2016-02-10 19:43:59 INFO:vbLDA:[ITER] 41,\telapsed time:0.62,\tELBO:-348008.20\n",
      "2016-02-10 19:43:59 INFO:vbLDA:[ITER] 42,\telapsed time:0.61,\tELBO:-348007.99\n",
      "2016-02-10 19:44:00 INFO:vbLDA:[ITER] 43,\telapsed time:0.62,\tELBO:-348007.58\n",
      "2016-02-10 19:44:01 INFO:vbLDA:[ITER] 44,\telapsed time:0.61,\tELBO:-348006.46\n",
      "2016-02-10 19:44:01 INFO:vbLDA:[ITER] 45,\telapsed time:0.63,\tELBO:-348003.09\n",
      "2016-02-10 19:44:02 INFO:vbLDA:[ITER] 46,\telapsed time:0.61,\tELBO:-347999.54\n",
      "2016-02-10 19:44:02 INFO:vbLDA:[ITER] 47,\telapsed time:0.61,\tELBO:-347995.14\n",
      "2016-02-10 19:44:03 INFO:vbLDA:[ITER] 48,\telapsed time:0.60,\tELBO:-347992.98\n",
      "2016-02-10 19:44:04 INFO:vbLDA:[ITER] 49,\telapsed time:0.59,\tELBO:-347990.23\n",
      "2016-02-10 19:44:04 INFO:vbLDA:[ITER] 50,\telapsed time:0.59,\tELBO:-347986.13\n",
      "2016-02-10 19:44:05 INFO:vbLDA:[ITER] 51,\telapsed time:0.59,\tELBO:-347984.36\n",
      "2016-02-10 19:44:05 INFO:vbLDA:[ITER] 52,\telapsed time:0.60,\tELBO:-347981.83\n",
      "2016-02-10 19:44:06 INFO:vbLDA:[ITER] 53,\telapsed time:0.59,\tELBO:-347980.00\n",
      "2016-02-10 19:44:07 INFO:vbLDA:[ITER] 54,\telapsed time:0.60,\tELBO:-347975.99\n",
      "2016-02-10 19:44:07 INFO:vbLDA:[ITER] 55,\telapsed time:0.58,\tELBO:-347973.46\n",
      "2016-02-10 19:44:08 INFO:vbLDA:[ITER] 56,\telapsed time:0.60,\tELBO:-347970.75\n",
      "2016-02-10 19:44:08 INFO:vbLDA:[ITER] 57,\telapsed time:0.59,\tELBO:-347970.34\n",
      "2016-02-10 19:44:09 INFO:vbLDA:[ITER] 58,\telapsed time:0.60,\tELBO:-347970.31\n",
      "2016-02-10 19:44:10 INFO:vbLDA:[ITER] 59,\telapsed time:0.59,\tELBO:-347970.25\n",
      "2016-02-10 19:44:10 INFO:vbLDA:[ITER] 60,\telapsed time:0.60,\tELBO:-347970.11\n",
      "2016-02-10 19:44:11 INFO:vbLDA:[ITER] 61,\telapsed time:0.65,\tELBO:-347969.67\n",
      "2016-02-10 19:44:12 INFO:vbLDA:[ITER] 62,\telapsed time:0.69,\tELBO:-347968.08\n",
      "2016-02-10 19:44:12 INFO:vbLDA:[ITER] 63,\telapsed time:0.67,\tELBO:-347967.16\n",
      "2016-02-10 19:44:13 INFO:vbLDA:[ITER] 64,\telapsed time:0.65,\tELBO:-347966.72\n",
      "2016-02-10 19:44:13 INFO:vbLDA:[ITER] 65,\telapsed time:0.63,\tELBO:-347965.37\n",
      "2016-02-10 19:44:14 INFO:vbLDA:[ITER] 66,\telapsed time:0.62,\tELBO:-347964.13\n",
      "2016-02-10 19:44:15 INFO:vbLDA:[ITER] 67,\telapsed time:0.62,\tELBO:-347964.13\n",
      "2016-02-10 19:44:15 INFO:vbLDA:[ITER] 68,\telapsed time:0.63,\tELBO:-347964.12\n",
      "2016-02-10 19:44:16 INFO:vbLDA:[ITER] 69,\telapsed time:0.63,\tELBO:-347964.11\n",
      "2016-02-10 19:44:17 INFO:vbLDA:[ITER] 70,\telapsed time:0.65,\tELBO:-347964.11\n",
      "2016-02-10 19:44:17 INFO:vbLDA:[ITER] 71,\telapsed time:0.65,\tELBO:-347964.10\n",
      "2016-02-10 19:44:18 INFO:vbLDA:[ITER] 72,\telapsed time:0.64,\tELBO:-347964.08\n",
      "2016-02-10 19:44:19 INFO:vbLDA:[ITER] 73,\telapsed time:0.62,\tELBO:-347964.06\n",
      "2016-02-10 19:44:19 INFO:vbLDA:[ITER] 74,\telapsed time:0.64,\tELBO:-347964.02\n",
      "2016-02-10 19:44:20 INFO:vbLDA:[ITER] 75,\telapsed time:0.62,\tELBO:-347963.94\n",
      "2016-02-10 19:44:20 INFO:vbLDA:[ITER] 76,\telapsed time:0.62,\tELBO:-347963.75\n",
      "2016-02-10 19:44:21 INFO:vbLDA:[ITER] 77,\telapsed time:0.62,\tELBO:-347963.15\n",
      "2016-02-10 19:44:22 INFO:vbLDA:[ITER] 78,\telapsed time:0.62,\tELBO:-347961.36\n",
      "2016-02-10 19:44:22 INFO:vbLDA:[ITER] 79,\telapsed time:0.64,\tELBO:-347960.89\n",
      "2016-02-10 19:44:23 INFO:vbLDA:[ITER] 80,\telapsed time:0.62,\tELBO:-347960.88\n",
      "2016-02-10 19:44:24 INFO:vbLDA:[ITER] 81,\telapsed time:0.61,\tELBO:-347960.86\n",
      "2016-02-10 19:44:24 INFO:vbLDA:[ITER] 82,\telapsed time:0.59,\tELBO:-347960.78\n",
      "2016-02-10 19:44:25 INFO:vbLDA:[ITER] 83,\telapsed time:0.64,\tELBO:-347960.45\n",
      "2016-02-10 19:44:25 INFO:vbLDA:[ITER] 84,\telapsed time:0.64,\tELBO:-347959.02\n",
      "2016-02-10 19:44:26 INFO:vbLDA:[ITER] 85,\telapsed time:0.64,\tELBO:-347958.29\n",
      "2016-02-10 19:44:27 INFO:vbLDA:[ITER] 86,\telapsed time:0.69,\tELBO:-347958.28\n",
      "2016-02-10 19:44:27 INFO:vbLDA:[ITER] 87,\telapsed time:0.64,\tELBO:-347958.28\n",
      "2016-02-10 19:44:28 INFO:vbLDA:[ITER] 88,\telapsed time:0.62,\tELBO:-347958.28\n",
      "2016-02-10 19:44:29 INFO:vbLDA:[ITER] 89,\telapsed time:0.61,\tELBO:-347958.27\n",
      "2016-02-10 19:44:29 INFO:vbLDA:[ITER] 90,\telapsed time:0.59,\tELBO:-347958.27\n",
      "2016-02-10 19:44:30 INFO:vbLDA:[ITER] 91,\telapsed time:0.59,\tELBO:-347958.26\n",
      "2016-02-10 19:44:30 INFO:vbLDA:[ITER] 92,\telapsed time:0.60,\tELBO:-347958.26\n",
      "2016-02-10 19:44:31 INFO:vbLDA:[ITER] 93,\telapsed time:0.63,\tELBO:-347958.25\n",
      "2016-02-10 19:44:32 INFO:vbLDA:[ITER] 94,\telapsed time:0.65,\tELBO:-347958.24\n",
      "2016-02-10 19:44:32 INFO:vbLDA:[ITER] 95,\telapsed time:0.66,\tELBO:-347958.23\n",
      "2016-02-10 19:44:33 INFO:vbLDA:[ITER] 96,\telapsed time:0.61,\tELBO:-347958.23\n",
      "2016-02-10 19:44:34 INFO:vbLDA:[ITER] 97,\telapsed time:0.59,\tELBO:-347958.22\n",
      "2016-02-10 19:44:34 INFO:vbLDA:[ITER] 98,\telapsed time:0.58,\tELBO:-347958.20\n",
      "2016-02-10 19:44:35 INFO:vbLDA:[ITER] 99,\telapsed time:0.59,\tELBO:-347958.19\n"
     ]
    }
   ],
   "source": [
    "logger = logging.getLogger('vbLDA')\n",
    "logger.propagate = False\n",
    "\n",
    "vbmodel = vbLDA(n_doc, n_voca, n_topic)\n",
    "vbmodel.fit(doc_ids, doc_cnt, max_iter=max_iter)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Print top 10 probability words for each topic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Topic 0 :  share,stock,profit,would,offer,corp,earnings,per,dividend,first\n",
      "Topic 1 :  fed,price,trade,may,two,april,market,reserve,would,japan\n",
      "Topic 2 :  dollar,would,one,foreign,growth,last,trade,economic,week,rise\n",
      "Topic 3 :  loss,profit,corp,note,quarter,national,share,gain,one,first\n",
      "Topic 4 :  bank,market,week,days,rate,money,new,april,today,day\n",
      "Topic 5 :  quarter,first,tax,share,income,april,bank,dividend,record,may\n",
      "Topic 6 :  oil,quarter,first,gas,march,gold,february,price,earnings,last\n",
      "Topic 7 :  japan,dollar,trade,would,yen,dome,japanese,market,also,agreement\n",
      "Topic 8 :  nil,last,stocks,month,production,total,grain,crop,wheat,end\n",
      "Topic 9 :  share,corp,april,wheat,price,new,group,would,exchange,department\n"
     ]
    }
   ],
   "source": [
    "for ti in range(n_topic):\n",
    "    top_words = get_top_words(vbmodel._lambda, voca, ti, n_words=10)\n",
    "    print('Topic', ti ,': ', ','.join(top_words))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  },
  "toc": {
   "toc_cell": true,
   "toc_number_sections": true,
   "toc_threshold": 4,
   "toc_window_display": true
  },
  "toc_position": {
   "left": "1120px",
   "right": "20px",
   "top": "120px",
   "width": "299px"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
